{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Kashgari NER Benchmarks\n",
    "\n",
    "- Kashgari: 2.1.0\n",
    "- TensorFLow: 2.1.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare Environment and Data\n",
    "\n",
    "Download Embeddings to Embddings Folder and unzip.\n",
    "- [BERT-Base, Chinese](https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip)\n",
    "\n",
    "Requirements:\n",
    "\n",
    "- Kashgari: 2.1.0\n",
    "- TensorFLow: 2.1.0\n",
    "\n",
    "Macros:\n",
    "\n",
    "- SEQUENCE_LENGTH: training sequence length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip uninstall -y kashgari\n",
    "!pip install git+https://github.com/BrikerMan/Kashgari.git@kashgari2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup macros\n",
    "SEQUENCE_LENGTH = 200\n",
    "EPOCHS = 30\n",
    "EARL_STOPPING_PATIENCE = 5\n",
    "REDUCE_RL_PATIENCE = 5\n",
    "\n",
    "BATCH_SIZE = 64\n",
    "\n",
    "EMBEDDING_FOLDER = './embeddings'\n",
    "TF_LOG_FOLDER = './tf_dir'\n",
    "LOG_FILE_PATH = './ner_training_log.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "EMBEDDING_FOLDER = '/Users/brikerman/Desktop/kashgari-demo/embeddings'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "DOWNLOAD_DATASET = False\n",
    "\n",
    "if DOWNLOAD_DATASET:\n",
    "    !mkdir embeddings\n",
    "    # Download BERT\n",
    "    !wget https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip\n",
    "    !unzip embeddings/chinese_L-12_H-768_A-12.zip\n",
    "    !mv chinese_L-12_H-768_A-12 embeddings/chinese_L-12_H-768_A-12\n",
    "    # Download Albert-Tiny-Google\n",
    "    !wget https://storage.googleapis.com/albert_zh/albert_tiny_zh_google.zip\n",
    "    !unzip albert_tiny_zh_google.zip\n",
    "    !mv albert_tiny_zh_google embeddings/albert_tiny_zh_google"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kashgari.corpus import ChineseDailyNerCorpus\n",
    "\n",
    "train_x, train_y = ChineseDailyNerCorpus.load_data('train')\n",
    "test_x, test_y = ChineseDailyNerCorpus.load_data('test')\n",
    "valid_x, valid_y = ChineseDailyNerCorpus.load_data('valid')\n",
    "\n",
    "train_x = train_x[:100]\n",
    "train_y = train_y[:100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from tensorflow import keras\n",
    "from kashgari.tasks.labeling import BiLSTM_Model, BiLSTM_CRF_Model\n",
    "from kashgari.tasks.labeling import BiGRU_Model, BiGRU_CRF_Model\n",
    "from kashgari.callbacks import EvalCallBack\n",
    "\n",
    "from kashgari.embeddings import WordEmbedding, BertEmbedding\n",
    "from IPython.display import clear_output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "# voidful/albert_chinese_tiny https://github.com/brightmart/albert_zh\n",
    "\n",
    "albert_tiny_path = os.path.join(EMBEDDING_FOLDER, 'albert_tiny_zh_google')\n",
    "albert_tiny_vocab = os.path.join(albert_tiny_path, 'vocab.txt')\n",
    "albert_tiny_config = os.path.join(albert_tiny_path, 'albert_config_tiny_g.json')\n",
    "albert_tiny_checkpoint = os.path.join(albert_tiny_path, 'albert_model.ckpt')\n",
    "albert_tiny = TransformerEmbedding(vocab_path=albert_tiny_vocab,\n",
    "                                   config_path=albert_tiny_config,\n",
    "                                   checkpoint_path=albert_tiny_checkpoint,\n",
    "                                   model_type='albert')\n",
    "\n",
    "# Google Bert\n",
    "bert_chinese = BertEmbedding(os.path.join(EMBEDDING_FOLDER, 'chinese_L-12_H-768_A-12'))\n",
    "\n",
    "embeddings = [\n",
    "    # ('Tiny-Albert-Chinse', albert_tiny),\n",
    "    # ('Bert-Chinese', bert_chinese),\n",
    "    ('Bare', None)\n",
    "]\n",
    "\n",
    "model_classes = [\n",
    "    ('BiLSTM', BiLSTM_Model), \n",
    "    ('BiLSTM_CRF', BiLSTM_CRF_Model), \n",
    "    ('BiGRU', BiGRU_Model), \n",
    "    ('BiGRU_CRF', BiGRU_CRF_Model)\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "tags": [
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend",
     "outputPrepend"
    ]
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Preparing text vocab dict: 100%|██████████| 200/200 [00:00<00:00, 56337.19it/s]\n",
      "Model: \"model_41\"\n",
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "Input-Token (InputLayer)        [(None, None)]       0                                            \n",
      "__________________________________________________________________________________________________\n",
      "Input-Segment (InputLayer)      [(None, None)]       0                                            \n",
      "__________________________________________________________________________________________________\n",
      "Embedding-Token (Embedding)     (None, None, 128)    2704384     Input-Token[0][0]                \n",
      "__________________________________________________________________________________________________\n",
      "Embedding-Segment (Embedding)   (None, None, 128)    256         Input-Segment[0][0]              \n",
      "__________________________________________________________________________________________________\n",
      "Embedding-Token-Segment (Add)   (None, None, 128)    0           Embedding-Token[0][0]            \n",
      "                                                                 Embedding-Segment[0][0]          \n",
      "__________________________________________________________________________________________________\n",
      "Embedding-Position (PositionEmb (None, None, 128)    65536       Embedding-Token-Segment[0][0]    \n",
      "__________________________________________________________________________________________________\n",
      "Embedding-Norm (LayerNormalizat (None, None, 128)    256         Embedding-Position[0][0]         \n",
      "__________________________________________________________________________________________________\n",
      "Embedding-Mapping (Dense)       (None, None, 312)    40248       Embedding-Norm[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "Transformer-MultiHeadSelfAttent (None, None, 312)    390624      Embedding-Mapping[0][0]          \n",
      "                                                                 Embedding-Mapping[0][0]          \n",
      "                                                                 Embedding-Mapping[0][0]          \n",
      "                                                                 Transformer-FeedForward-Norm[0][0\n",
      "                                                                 Transformer-FeedForward-Norm[0][0\n",
      "                                                                 Transformer-FeedForward-Norm[0][0\n",
      "                                                                 Transformer-FeedForward-Norm[1][0\n",
      "                                                                 Transformer-FeedForward-Norm[1][0\n",
      "                                                                 Transformer-FeedForward-Norm[1][0\n",
      "                                                                 Transformer-FeedForward-Norm[2][0\n",
      "                                                                 Transformer-FeedForward-Norm[2][0\n",
      "                                                                 Transformer-FeedForward-Norm[2][0\n",
      "__________________________________________________________________________________________________\n",
      "Transformer-MultiHeadSelfAttent (None, None, 312)    0           Embedding-Mapping[0][0]          \n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward-Norm[0][0\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward-Norm[1][0\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward-Norm[2][0\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "__________________________________________________________________________________________________\n",
      "Transformer-MultiHeadSelfAttent (None, None, 312)    624         Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "__________________________________________________________________________________________________\n",
      "Transformer-FeedForward (FeedFo (None, None, 312)    780312      Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "__________________________________________________________________________________________________\n",
      "Transformer-FeedForward-Add (Ad (None, None, 312)    0           Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward[0][0]    \n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward[1][0]    \n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward[2][0]    \n",
      "                                                                 Transformer-MultiHeadSelfAttentio\n",
      "                                                                 Transformer-FeedForward[3][0]    \n",
      "__________________________________________________________________________________________________\n",
      "Transformer-FeedForward-Norm (L (None, None, 312)    624         Transformer-FeedForward-Add[0][0]\n",
      "                                                                 Transformer-FeedForward-Add[1][0]\n",
      "                                                                 Transformer-FeedForward-Add[2][0]\n",
      "                                                                 Transformer-FeedForward-Add[3][0]\n",
      "__________________________________________________________________________________________________\n",
      "layer_blstm (Bidirectional)     (None, None, 256)    451584      Transformer-FeedForward-Norm[3][0\n",
      "__________________________________________________________________________________________________\n",
      "layer_dropout (Dropout)         (None, None, 256)    0           layer_blstm[0][0]                \n",
      "__________________________________________________________________________________________________\n",
      "dense_13 (Dense)                (None, None, 8)      2056        layer_dropout[0][0]              \n",
      "__________________________________________________________________________________________________\n",
      "activation_9 (Activation)       (None, None, 8)      0           dense_13[0][0]                   \n",
      "==================================================================================================\n",
      "Total params: 4,436,504\n",
      "Trainable params: 453,640\n",
      "Non-trainable params: 3,982,864\n",
      "__________________________________________________________________________________________________\n",
      "WARNING:tensorflow:sample_weight modes were coerced from\n",
      "  ...\n",
      "    to  \n",
      "  ['...']\n",
      "WARNING:tensorflow:sample_weight modes were coerced from\n",
      "  ...\n",
      "    to  \n",
      "  ['...']\n",
      "WARNING:tensorflow:sample_weight modes were coerced from\n",
      "  ...\n",
      "    to  \n",
      "  ['...']\n",
      "WARNING:tensorflow:sample_weight modes were coerced from\n",
      "  ...\n",
      "    to  \n",
      "  ['...']\n",
      "Train for 3 steps, validate for 36 steps\n",
      "Epoch 1/30\n",
      "2/3 [===================>..........] - ETA: 7s - loss: 0.3804 - accuracy: 0.4951WARNING:root:Sequence length is None, will use the max length of the samples, which is 512\n",
      "           precision    recall  f1-score   support\n",
      "\n",
      "      LOC     0.0000    0.0000    0.0000      1934\n",
      "      ORG     0.0000    0.0000    0.0000       984\n",
      "      PER     0.0000    0.0000    0.0000       884\n",
      "\n",
      "micro avg     0.0000    0.0000    0.0000      3802\n",
      "macro avg     0.0000    0.0000    0.0000      3802\n",
      "\n",
      "\n",
      "epoch: 0 precision: 0.000000, recall: 0.000000, f1-score: 0.000000\n",
      "3/3 [==============================] - 406s 135s/step - loss: 0.3062 - accuracy: 0.6364 - val_loss: 0.1285 - val_accuracy: 0.8911\n",
      "Epoch 2/30\n",
      "2/3 [===================>..........] - ETA: 1s - loss: 0.1385 - accuracy: 0.8807"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-81-1b8b10654130>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     25\u001b[0m                   \u001b[0mvalid_y\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m                   \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m                   epochs=EPOCHS)\n\u001b[0m\u001b[1;32m     28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     29\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mLOG_FILE_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/kashgari/tasks/labeling/abc_model.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x_train, y_train, x_validate, y_validate, batch_size, epochs, callbacks, fit_kwargs)\u001b[0m\n\u001b[1;32m     95\u001b[0m                                   \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m                                   \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m                                   fit_kwargs=fit_kwargs)\n\u001b[0m\u001b[1;32m     98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m     def fit_generator(self,\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/kashgari/tasks/labeling/abc_model.py\u001b[0m in \u001b[0;36mfit_generator\u001b[0;34m(self, train_sample_gen, valid_sample_gen, batch_size, epochs, callbacks, fit_kwargs)\u001b[0m\n\u001b[1;32m    158\u001b[0m                                  \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    159\u001b[0m                                  \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m                                  **fit_kwargs)\n\u001b[0m\u001b[1;32m    161\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m     def predict(self,  # type: ignore[override]\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m    817\u001b[0m         \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    818\u001b[0m         \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 819\u001b[0;31m         use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m    820\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    821\u001b[0m   def evaluate(self,\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m    395\u001b[0m                       total_epochs=1)\n\u001b[1;32m    396\u001b[0m                   cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TEST,\n\u001b[0;32m--> 397\u001b[0;31m                                  prefix='val_')\n\u001b[0m\u001b[1;32m    398\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    399\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.miniconda3/lib/python3.7/contextlib.py\u001b[0m in \u001b[0;36m__exit__\u001b[0;34m(self, type, value, traceback)\u001b[0m\n\u001b[1;32m    117\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m                 \u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    120\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    121\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mon_epoch\u001b[0;34m(self, epoch, mode)\u001b[0m\n\u001b[1;32m    769\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    770\u001b[0m         \u001b[0;31m# Epochs only apply to `fit`.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 771\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_epoch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    772\u001b[0m       \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprogbar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_epoch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    773\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/callbacks.py\u001b[0m in \u001b[0;36mon_epoch_end\u001b[0;34m(self, epoch, logs)\u001b[0m\n\u001b[1;32m    300\u001b[0m     \u001b[0mlogs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlogs\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    301\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mcallback\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 302\u001b[0;31m       \u001b[0mcallback\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_epoch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    303\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    304\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/kashgari/callbacks/eval_callBack.py\u001b[0m in \u001b[0;36mon_epoch_end\u001b[0;34m(self, epoch, logs)\u001b[0m\n\u001b[1;32m     49\u001b[0m                 \u001b[0;34m'recall'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mreport\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'recall'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     50\u001b[0m                 \u001b[0;34m'f1-score'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mreport\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'f1-score'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m             })\n\u001b[0m\u001b[1;32m     52\u001b[0m             print(f\"\\nepoch: {epoch} precision: {report['precision']:.6f},\"\n\u001b[1;32m     53\u001b[0m                   f\" recall: {report['recall']:.6f}, f1-score: {report['f1-score']:.6f}\")\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/kashgari/tasks/labeling/abc_model.py\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(self, x_data, y_data, batch_size, digits, truncating, debug_info)\u001b[0m\n\u001b[1;32m    281\u001b[0m                               \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    282\u001b[0m                               \u001b[0mtruncating\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtruncating\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m                               debug_info=debug_info)\n\u001b[0m\u001b[1;32m    284\u001b[0m         \u001b[0my_true\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_pred\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseq\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/kashgari/tasks/labeling/abc_model.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x_data, batch_size, truncating, debug_info, predict_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m    194\u001b[0m                                                    \u001b[0mseq_lengtg\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseq_length\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    195\u001b[0m                                                    max_position=self.embedding.max_position)\n\u001b[0;32m--> 196\u001b[0;31m             \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtf_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpredict_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    197\u001b[0m             \u001b[0mpred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpred\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    198\u001b[0m             \u001b[0mlengths\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msen\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msen\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mx_data\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m   1011\u001b[0m         \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1012\u001b[0m         \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1013\u001b[0;31m         use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m   1014\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1015\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0mreset_metrics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, model, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m    496\u001b[0m         \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPREDICT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    497\u001b[0m         \u001b[0msteps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 498\u001b[0;31m         workers=workers, use_multiprocessing=use_multiprocessing, **kwargs)\n\u001b[0m\u001b[1;32m    499\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    500\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36m_model_iteration\u001b[0;34m(self, model, mode, x, y, batch_size, verbose, sample_weight, steps, callbacks, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m    473\u001b[0m               \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    474\u001b[0m               \u001b[0mtraining_context\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 475\u001b[0;31m               total_epochs=1)\n\u001b[0m\u001b[1;32m    476\u001b[0m           \u001b[0mcbks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    477\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[0;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[1;32m    126\u001b[0m         step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[1;32m    127\u001b[0m       \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m         \u001b[0mbatch_outs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    129\u001b[0m       \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    130\u001b[0m         \u001b[0;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[0;34m(input_fn)\u001b[0m\n\u001b[1;32m     96\u001b[0m     \u001b[0;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     97\u001b[0m     return nest.map_structure(_non_none_constant_value,\n\u001b[0;32m---> 98\u001b[0;31m                               distributed_function(input_fn))\n\u001b[0m\u001b[1;32m     99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    100\u001b[0m   \u001b[0;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    566\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    567\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 568\u001b[0;31m       \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    569\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    570\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m    604\u001b[0m       \u001b[0;31m# In this case we have not created variables on the first call. So we can\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    605\u001b[0m       \u001b[0;31m# run the first trace but we should fail if variables are created.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 606\u001b[0;31m       \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    607\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_created_variables\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    608\u001b[0m         raise ValueError(\"Creating variables on a non-first call to a function\"\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   2361\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2362\u001b[0m       \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2363\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2365\u001b[0m   \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m   1609\u001b[0m          if isinstance(t, (ops.Tensor,\n\u001b[1;32m   1610\u001b[0m                            resource_variable_ops.BaseResourceVariable))),\n\u001b[0;32m-> 1611\u001b[0;31m         self.captured_inputs)\n\u001b[0m\u001b[1;32m   1612\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1613\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m   1690\u001b[0m       \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1691\u001b[0m       return self._build_call_outputs(self._inference_function.call(\n\u001b[0;32m-> 1692\u001b[0;31m           ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[1;32m   1693\u001b[0m     forward_backward = self._select_forward_and_backward_functions(\n\u001b[1;32m   1694\u001b[0m         \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m    543\u001b[0m               \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    544\u001b[0m               \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"executor_type\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"config_proto\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 545\u001b[0;31m               ctx=ctx)\n\u001b[0m\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    547\u001b[0m           outputs = execute.execute_with_cancellation(\n",
      "\u001b[0;32m~/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m     59\u001b[0m     tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,\n\u001b[1;32m     60\u001b[0m                                                \u001b[0mop_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m                                                num_outputs)\n\u001b[0m\u001b[1;32m     62\u001b[0m   \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "for embed_name, embed in embeddings:\n",
    "    for model_name, MOEDL_CLASS in model_classes:\n",
    "        run_name = f\"{embed_name}-{model_name}\"\n",
    "        model = MOEDL_CLASS(embed, sequence_length=SEQUENCE_LENGTH)\n",
    "        \n",
    "        early_stop = keras.callbacks.EarlyStopping(patience=EARL_STOPPING_PATIENCE)\n",
    "        reduse_lr_callback = keras.callbacks.ReduceLROnPlateau(factor=0.1, \n",
    "                                                               patience=REDUCE_RL_PATIENCE)\n",
    "\n",
    "        eval_callback = EvalCallBack(task_model=model,\n",
    "                                     valid_x=valid_x, \n",
    "                                     valid_y=valid_y,\n",
    "                                     step=1)\n",
    "\n",
    "        tf_board = keras.callbacks.TensorBoard(\n",
    "            log_dir=os.path.join(TF_LOG_FOLDER, run_name), \n",
    "            update_freq=1000\n",
    "        )\n",
    "\n",
    "        callbacks = [early_stop, reduse_lr_callback, eval_callback, tf_board]\n",
    "\n",
    "        model.fit(train_x, \n",
    "                  train_y,\n",
    "                  valid_x,\n",
    "                  valid_y,\n",
    "                  callbacks=callbacks,\n",
    "                  epochs=EPOCHS)\n",
    "\n",
    "        if os.path.exists(LOG_FILE_PATH):\n",
    "            logs = json.load(open(LOG_FILE_PATH, 'r'))\n",
    "        else:\n",
    "            logs = {}\n",
    "        \n",
    "        logs[run_name] = eval_callback.logs\n",
    "        \n",
    "        with open(LOG_FILE_PATH, 'w') as f:\n",
    "            f.write(json.dumps(logs, indent=2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(albert_tiny_489k_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!cat /Users/brikerman/Desktop/kashgari-demo/embeddings/albert_tiny_489k/albert_config_tiny.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
      "\u001b[31mERROR: Could not find a version that satisfies the requirement transformer (from versions: none)\u001b[0m\n",
      "\u001b[31mERROR: No matching distribution found for transformer\u001b[0m\n",
      "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.1 is available.\n",
      "You should consider upgrading via the '/Users/brikerman/Desktop/python/Kashgari2/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install transformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.4 64-bit ('venv': venv)",
   "language": "python",
   "name": "python37464bitvenvvenv1fddca7c786445709a8f03a12aa91dfc"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
